package indexer.processer

import us.codecraft.webmagic.Site
import us.codecraft.webmagic.selector.Selectable

class QuanshuwuPageProcessor : UrlPageProcessor {
    override fun getUrl(): String = "http://www.quanshuwu.com"

    override fun getSite(): Site {
        return Site.me().setRetryTimes(3).setSleepTime(1000).setCycleRetryTimes(3).setTimeOut(60_000).setRetrySleepTime(10_000)
    }

    override fun process(page: us.codecraft.webmagic.Page) {
        val html = page.html

        val urls = html.links().regex("https?://.*\\.quanshuwu.com.*").all()
        page.addTargetRequests(urls)

        val mainInfo: Selectable = html.xpath("//div[@id='bookinfo']")

        if (mainInfo.match()) {
            val name = mainInfo.xpath("//div[@class='ti']/h1[1]/text()").get()
            val author = mainInfo.xpath("//div[@class='ti']/p[1]/text()").get()
            val url = page.request.url
            page.putField("name",name.trim())
            page.putField("url",url)
            page.putField("author",author.substring(3))

        } else {
            page.setSkip(true)

        }


    }
}